{"cells":[{"metadata":{"id":"54F374ECDB8948F2808DFE6D28EC44FF","collapsed":false,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"Index(['label', 'title_sum', 'title_count', 'title_ctr',\n       'title_query_dot_similarity', 'title_query_wmd_dis', 'query_word_num',\n       'title_word_num', 'is_query_in_title', 'query_len', 'title_len',\n       'len_title-query', 'len_query/title', 'q_t_word_match_share',\n       'q_t_jaccard', 'q_t_common_words', 'q_t_total_unique_words',\n       'q_t_wc_diff', 'q_t_wc_ratio', 'q_t_wc_diff_unique',\n       'q_t_wc_ratio_unique', 'title_nunique_prefix', 'Levenshtein_dis',\n       'Levenshtein_ratio', 'Levenshtein_jaro', 'Levenshtein_jaro_winkler',\n       'longistStr_len', 'prefix_title_pos', 'prefix_title_relative_pos',\n       'len_title-query_word', 'len_query/title_word',\n       'title_query_leven_rate', 'lcseque_len', 'preifx_detail_title_pos',\n       'cityblock_sim', 'norm_similarity', 'cosine_similarity', '0_lsi',\n       '1_lsi', '2_lsi', '3_lsi', '4_lsi', '5_lsi', '6_lsi', '7_lsi', '8_lsi',\n       '9_lsi', '10_lsi', '11_lsi', '12_lsi', '13_lsi', '14_lsi', '15_lsi',\n       '16_lsi', '17_lsi', '18_lsi', '19_lsi'],\n      dtype='object')"},"execution_count":3}],"source":"train_data.columns","execution_count":3},{"metadata":{"id":"117C1440A0F44621A5B7A0FC5FA7DB57","collapsed":false,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"stream","text":"(50000000, 57)\n['title_sum', 'title_count', 'title_ctr', 'title_query_dot_similarity', 'title_query_wmd_dis', 'query_word_num', 'title_word_num', 'is_query_in_title', 'query_len', 'title_len', 'len_title-query', 'len_query/title', 'q_t_word_match_share', 'q_t_jaccard', 'q_t_common_words', 'q_t_total_unique_words', 'q_t_wc_diff', 'q_t_wc_ratio', 'q_t_wc_diff_unique', 'q_t_wc_ratio_unique', 'title_nunique_prefix', 'Levenshtein_dis', 'Levenshtein_ratio', 'Levenshtein_jaro', 'Levenshtein_jaro_winkler', 'longistStr_len', 'prefix_title_pos', 'prefix_title_relative_pos', 'len_title-query_word', 'len_query/title_word', 'title_query_leven_rate', 'lcseque_len', 'preifx_detail_title_pos', 'cityblock_sim', 'norm_similarity', 'cosine_similarity', '0_lsi', '1_lsi', '2_lsi', '3_lsi', '4_lsi', '5_lsi', '6_lsi', '7_lsi', '8_lsi', '9_lsi', '10_lsi', '11_lsi', '12_lsi', '13_lsi', '14_lsi', '15_lsi', '16_lsi', '17_lsi', '18_lsi', '19_lsi']\n","name":"stdout"},{"output_type":"stream","text":"/opt/conda/lib/python3.6/site-packages/lightgbm/engine.py:147: UserWarning: Found `num_boost_round` in params. Will use it instead of argument\n  warnings.warn(\"Found `{}` in params. Will use it instead of argument\".format(alias))\n/opt/conda/lib/python3.6/site-packages/lightgbm/basic.py:755: UserWarning: silent keyword has been found in `params` and will be ignored.\nPlease use silent argument of the Dataset constructor to pass this parameter.\n  .format(key))\n","name":"stderr"},{"output_type":"stream","text":"[2]\tvalid_0's binary_logloss: 0.474023\n[4]\tvalid_0's binary_logloss: 0.473229\n[6]\tvalid_0's binary_logloss: 0.472439\n[8]\tvalid_0's binary_logloss: 0.471922\n[10]\tvalid_0's binary_logloss: 0.471173\n[12]\tvalid_0's binary_logloss: 0.470452\n[14]\tvalid_0's binary_logloss: 0.469755\n[16]\tvalid_0's binary_logloss: 0.469073\n[18]\tvalid_0's binary_logloss: 0.46848\n[20]\tvalid_0's binary_logloss: 0.468112\n[22]\tvalid_0's binary_logloss: 0.467884\n[24]\tvalid_0's binary_logloss: 0.467255\n[26]\tvalid_0's binary_logloss: 0.466705\n[28]\tvalid_0's binary_logloss: 0.466181\n[30]\tvalid_0's binary_logloss: 0.465653\n[32]\tvalid_0's binary_logloss: 0.465086\n[34]\tvalid_0's binary_logloss: 0.464536\n[36]\tvalid_0's binary_logloss: 0.464059\n[38]\tvalid_0's binary_logloss: 0.463591\n[40]\tvalid_0's binary_logloss: 0.463088\n[42]\tvalid_0's binary_logloss: 0.462601\n[44]\tvalid_0's binary_logloss: 0.462171\n[46]\tvalid_0's binary_logloss: 0.461703\n[48]\tvalid_0's binary_logloss: 0.461291\n[50]\tvalid_0's binary_logloss: 0.460851\n[52]\tvalid_0's binary_logloss: 0.460419\n[54]\tvalid_0's binary_logloss: 0.459998\n[56]\tvalid_0's binary_logloss: 0.459629\n[58]\tvalid_0's binary_logloss: 0.459229\n[60]\tvalid_0's binary_logloss: 0.458881\n[62]\tvalid_0's binary_logloss: 0.458536\n[64]\tvalid_0's binary_logloss: 0.458168\n[66]\tvalid_0's binary_logloss: 0.457842\n[68]\tvalid_0's binary_logloss: 0.457523\n[70]\tvalid_0's binary_logloss: 0.457183\n[72]\tvalid_0's binary_logloss: 0.456847\n[74]\tvalid_0's binary_logloss: 0.456521\n[76]\tvalid_0's binary_logloss: 0.456204\n[78]\tvalid_0's binary_logloss: 0.455898\n[80]\tvalid_0's binary_logloss: 0.455626\n[82]\tvalid_0's binary_logloss: 0.455358\n[84]\tvalid_0's binary_logloss: 0.455067\n[86]\tvalid_0's binary_logloss: 0.454786\n[88]\tvalid_0's binary_logloss: 0.454538\n[90]\tvalid_0's binary_logloss: 0.454272\n[92]\tvalid_0's binary_logloss: 0.45401\n[94]\tvalid_0's binary_logloss: 0.453777\n[96]\tvalid_0's binary_logloss: 0.453573\n[98]\tvalid_0's binary_logloss: 0.453331\n[100]\tvalid_0's binary_logloss: 0.453089\n[102]\tvalid_0's binary_logloss: 0.452854\n[104]\tvalid_0's binary_logloss: 0.452693\n[106]\tvalid_0's binary_logloss: 0.452466\n[108]\tvalid_0's binary_logloss: 0.45225\n[110]\tvalid_0's binary_logloss: 0.452035\n[112]\tvalid_0's binary_logloss: 0.451824\n[114]\tvalid_0's binary_logloss: 0.451619\n[116]\tvalid_0's binary_logloss: 0.451419\n[118]\tvalid_0's binary_logloss: 0.451243\n[120]\tvalid_0's binary_logloss: 0.451052\n[122]\tvalid_0's binary_logloss: 0.450868\n[124]\tvalid_0's binary_logloss: 0.450685\n[126]\tvalid_0's binary_logloss: 0.450506\n[128]\tvalid_0's binary_logloss: 0.450352\n[130]\tvalid_0's binary_logloss: 0.450182\n[132]\tvalid_0's binary_logloss: 0.450014\n[134]\tvalid_0's binary_logloss: 0.449854\n[136]\tvalid_0's binary_logloss: 0.449735\n[138]\tvalid_0's binary_logloss: 0.449579\n[140]\tvalid_0's binary_logloss: 0.449427\n[142]\tvalid_0's binary_logloss: 0.449279\n[144]\tvalid_0's binary_logloss: 0.449148\n[146]\tvalid_0's binary_logloss: 0.449008\n[148]\tvalid_0's binary_logloss: 0.448901\n[150]\tvalid_0's binary_logloss: 0.448765\n[152]\tvalid_0's binary_logloss: 0.448631\n[154]\tvalid_0's binary_logloss: 0.448531\n[156]\tvalid_0's binary_logloss: 0.448433\n[158]\tvalid_0's binary_logloss: 0.448305\n[160]\tvalid_0's binary_logloss: 0.44818\n[162]\tvalid_0's binary_logloss: 0.448069\n[164]\tvalid_0's binary_logloss: 0.44795\n[166]\tvalid_0's binary_logloss: 0.447833\n[168]\tvalid_0's binary_logloss: 0.447718\n[170]\tvalid_0's binary_logloss: 0.447606\n[172]\tvalid_0's binary_logloss: 0.447498\n[174]\tvalid_0's binary_logloss: 0.447391\n[176]\tvalid_0's binary_logloss: 0.447287\n[178]\tvalid_0's binary_logloss: 0.447194\n[180]\tvalid_0's binary_logloss: 0.447103\n[182]\tvalid_0's binary_logloss: 0.447004\n[184]\tvalid_0's binary_logloss: 0.446906\n[186]\tvalid_0's binary_logloss: 0.446813\n[188]\tvalid_0's binary_logloss: 0.446721\n[190]\tvalid_0's binary_logloss: 0.446626\n[192]\tvalid_0's binary_logloss: 0.446547\n[194]\tvalid_0's binary_logloss: 0.446476\n[196]\tvalid_0's binary_logloss: 0.446387\n[198]\tvalid_0's binary_logloss: 0.446304\n[200]\tvalid_0's binary_logloss: 0.44622\n[202]\tvalid_0's binary_logloss: 0.44614\n[204]\tvalid_0's binary_logloss: 0.446062\n[206]\tvalid_0's binary_logloss: 0.44599\n[208]\tvalid_0's binary_logloss: 0.445913\n[210]\tvalid_0's binary_logloss: 0.445845\n[212]\tvalid_0's binary_logloss: 0.445774\n[214]\tvalid_0's binary_logloss: 0.44571\n[216]\tvalid_0's binary_logloss: 0.445659\n[218]\tvalid_0's binary_logloss: 0.44559\n[220]\tvalid_0's binary_logloss: 0.44552\n[222]\tvalid_0's binary_logloss: 0.445453\n[224]\tvalid_0's binary_logloss: 0.445385\n[226]\tvalid_0's binary_logloss: 0.445332\n[228]\tvalid_0's binary_logloss: 0.445274\n[230]\tvalid_0's binary_logloss: 0.445219\n[232]\tvalid_0's binary_logloss: 0.445156\n[234]\tvalid_0's binary_logloss: 0.445101\n[236]\tvalid_0's binary_logloss: 0.445046\n[238]\tvalid_0's binary_logloss: 0.444988\n[240]\tvalid_0's binary_logloss: 0.44493\n[242]\tvalid_0's binary_logloss: 0.444874\n[244]\tvalid_0's binary_logloss: 0.444822\n[246]\tvalid_0's binary_logloss: 0.444768\n[248]\tvalid_0's binary_logloss: 0.444715\n[250]\tvalid_0's binary_logloss: 0.444661\n[252]\tvalid_0's binary_logloss: 0.44461\n[254]\tvalid_0's binary_logloss: 0.444565\n[256]\tvalid_0's binary_logloss: 0.444516\n[258]\tvalid_0's binary_logloss: 0.444466\n[260]\tvalid_0's binary_logloss: 0.444431\n[262]\tvalid_0's binary_logloss: 0.444384\n[264]\tvalid_0's binary_logloss: 0.44434\n[266]\tvalid_0's binary_logloss: 0.4443\n[268]\tvalid_0's binary_logloss: 0.444258\n[270]\tvalid_0's binary_logloss: 0.444221\n[272]\tvalid_0's binary_logloss: 0.444177\n[274]\tvalid_0's binary_logloss: 0.444135\n[276]\tvalid_0's binary_logloss: 0.444093\n[278]\tvalid_0's binary_logloss: 0.444053\n[280]\tvalid_0's binary_logloss: 0.444014\n[282]\tvalid_0's binary_logloss: 0.44398\n[284]\tvalid_0's binary_logloss: 0.443939\n[286]\tvalid_0's binary_logloss: 0.443902\n[288]\tvalid_0's binary_logloss: 0.443864\n[290]\tvalid_0's binary_logloss: 0.443826\n[292]\tvalid_0's binary_logloss: 0.443789\n[294]\tvalid_0's binary_logloss: 0.44375\n[296]\tvalid_0's binary_logloss: 0.443715\n[298]\tvalid_0's binary_logloss: 0.443679\n[300]\tvalid_0's binary_logloss: 0.443644\n[302]\tvalid_0's binary_logloss: 0.443616\n[304]\tvalid_0's binary_logloss: 0.443584\n[306]\tvalid_0's binary_logloss: 0.443553\n[308]\tvalid_0's binary_logloss: 0.443524\n[310]\tvalid_0's binary_logloss: 0.443494\n[312]\tvalid_0's binary_logloss: 0.443462\n[314]\tvalid_0's binary_logloss: 0.443431\n[316]\tvalid_0's binary_logloss: 0.443405\n[318]\tvalid_0's binary_logloss: 0.443376\n[320]\tvalid_0's binary_logloss: 0.443345\n[322]\tvalid_0's binary_logloss: 0.443317\n[324]\tvalid_0's binary_logloss: 0.443289\n[326]\tvalid_0's binary_logloss: 0.443261\n[328]\tvalid_0's binary_logloss: 0.443238\n[330]\tvalid_0's binary_logloss: 0.443211\n[332]\tvalid_0's binary_logloss: 0.443186\n[334]\tvalid_0's binary_logloss: 0.443158\n[336]\tvalid_0's binary_logloss: 0.443132\n[338]\tvalid_0's binary_logloss: 0.443106\n[340]\tvalid_0's binary_logloss: 0.443082\n[342]\tvalid_0's binary_logloss: 0.443056\n[344]\tvalid_0's binary_logloss: 0.44303\n[346]\tvalid_0's binary_logloss: 0.443008\n[348]\tvalid_0's binary_logloss: 0.442985\n[350]\tvalid_0's binary_logloss: 0.442961\n[352]\tvalid_0's binary_logloss: 0.442938\n[354]\tvalid_0's binary_logloss: 0.442915\n[356]\tvalid_0's binary_logloss: 0.442894\n[358]\tvalid_0's binary_logloss: 0.442873\n[360]\tvalid_0's binary_logloss: 0.442855\n[362]\tvalid_0's binary_logloss: 0.442834\n[364]\tvalid_0's binary_logloss: 0.442815\n[366]\tvalid_0's binary_logloss: 0.442792\n[368]\tvalid_0's binary_logloss: 0.442772\n[370]\tvalid_0's binary_logloss: 0.442752\n[372]\tvalid_0's binary_logloss: 0.442732\n[374]\tvalid_0's binary_logloss: 0.442712\n[376]\tvalid_0's binary_logloss: 0.442693\n[378]\tvalid_0's binary_logloss: 0.442675\n[380]\tvalid_0's binary_logloss: 0.442656\n[382]\tvalid_0's binary_logloss: 0.442636\n[384]\tvalid_0's binary_logloss: 0.442618\n[386]\tvalid_0's binary_logloss: 0.4426\n[388]\tvalid_0's binary_logloss: 0.442581\n[390]\tvalid_0's binary_logloss: 0.442563\n[392]\tvalid_0's binary_logloss: 0.442546\n[394]\tvalid_0's binary_logloss: 0.442529\n[396]\tvalid_0's binary_logloss: 0.442511\n[398]\tvalid_0's binary_logloss: 0.442494\n[400]\tvalid_0's binary_logloss: 0.442476\n[402]\tvalid_0's binary_logloss: 0.442459\n[404]\tvalid_0's binary_logloss: 0.442444\n[406]\tvalid_0's binary_logloss: 0.442427\n[408]\tvalid_0's binary_logloss: 0.44241\n[410]\tvalid_0's binary_logloss: 0.442395\n[412]\tvalid_0's binary_logloss: 0.442379\n[414]\tvalid_0's binary_logloss: 0.442363\n[416]\tvalid_0's binary_logloss: 0.442348\n[418]\tvalid_0's binary_logloss: 0.442332\n[420]\tvalid_0's binary_logloss: 0.442317\n[422]\tvalid_0's binary_logloss: 0.442301\n[424]\tvalid_0's binary_logloss: 0.442285\n[426]\tvalid_0's binary_logloss: 0.442271\n[428]\tvalid_0's binary_logloss: 0.442258\n[430]\tvalid_0's binary_logloss: 0.442246\n[432]\tvalid_0's binary_logloss: 0.442232\n[434]\tvalid_0's binary_logloss: 0.442218\n[436]\tvalid_0's binary_logloss: 0.442205\n[438]\tvalid_0's binary_logloss: 0.44219\n[440]\tvalid_0's binary_logloss: 0.442176\n[442]\tvalid_0's binary_logloss: 0.442164\n[444]\tvalid_0's binary_logloss: 0.442151\n[446]\tvalid_0's binary_logloss: 0.442138\n[448]\tvalid_0's binary_logloss: 0.442125\n[450]\tvalid_0's binary_logloss: 0.442112\n[452]\tvalid_0's binary_logloss: 0.442099\n[454]\tvalid_0's binary_logloss: 0.442087\n[456]\tvalid_0's binary_logloss: 0.442073\n[458]\tvalid_0's binary_logloss: 0.442062\n[460]\tvalid_0's binary_logloss: 0.44205\n[462]\tvalid_0's binary_logloss: 0.442038\n[464]\tvalid_0's binary_logloss: 0.442025\n[466]\tvalid_0's binary_logloss: 0.442014\n[468]\tvalid_0's binary_logloss: 0.442003\n[470]\tvalid_0's binary_logloss: 0.441993\n[472]\tvalid_0's binary_logloss: 0.441982\n[474]\tvalid_0's binary_logloss: 0.44197\n[476]\tvalid_0's binary_logloss: 0.441958\n[478]\tvalid_0's binary_logloss: 0.441947\n[480]\tvalid_0's binary_logloss: 0.441936\n[482]\tvalid_0's binary_logloss: 0.441924\n[484]\tvalid_0's binary_logloss: 0.441913\n[486]\tvalid_0's binary_logloss: 0.441904\n[488]\tvalid_0's binary_logloss: 0.441893\n[490]\tvalid_0's binary_logloss: 0.441883\n[492]\tvalid_0's binary_logloss: 0.441872\n[494]\tvalid_0's binary_logloss: 0.441862\n[496]\tvalid_0's binary_logloss: 0.441853\n[498]\tvalid_0's binary_logloss: 0.441844\n[500]\tvalid_0's binary_logloss: 0.441834\n[502]\tvalid_0's binary_logloss: 0.441824\n[504]\tvalid_0's binary_logloss: 0.441815\n[506]\tvalid_0's binary_logloss: 0.441806\n[508]\tvalid_0's binary_logloss: 0.441797\n[510]\tvalid_0's binary_logloss: 0.441788\n[512]\tvalid_0's binary_logloss: 0.441777\n[514]\tvalid_0's binary_logloss: 0.441769\n[516]\tvalid_0's binary_logloss: 0.44176\n[518]\tvalid_0's binary_logloss: 0.44175\n[520]\tvalid_0's binary_logloss: 0.44174\n[522]\tvalid_0's binary_logloss: 0.441732\n[524]\tvalid_0's binary_logloss: 0.441724\n[526]\tvalid_0's binary_logloss: 0.441716\n[528]\tvalid_0's binary_logloss: 0.441708\n[530]\tvalid_0's binary_logloss: 0.4417\n[532]\tvalid_0's binary_logloss: 0.441692\n[534]\tvalid_0's binary_logloss: 0.441683\n[536]\tvalid_0's binary_logloss: 0.441674\n[538]\tvalid_0's binary_logloss: 0.441665\n[540]\tvalid_0's binary_logloss: 0.441656\n[542]\tvalid_0's binary_logloss: 0.441648\n[544]\tvalid_0's binary_logloss: 0.441641\n[546]\tvalid_0's binary_logloss: 0.441633\n[548]\tvalid_0's binary_logloss: 0.441625\n[550]\tvalid_0's binary_logloss: 0.441617\n[552]\tvalid_0's binary_logloss: 0.441608\n[554]\tvalid_0's binary_logloss: 0.441601\n[556]\tvalid_0's binary_logloss: 0.441592\n[558]\tvalid_0's binary_logloss: 0.441583\n[560]\tvalid_0's binary_logloss: 0.441575\n[562]\tvalid_0's binary_logloss: 0.441566\n[564]\tvalid_0's binary_logloss: 0.441557\n[566]\tvalid_0's binary_logloss: 0.44155\n[568]\tvalid_0's binary_logloss: 0.441541\n[570]\tvalid_0's binary_logloss: 0.441534\n[572]\tvalid_0's binary_logloss: 0.441527\n[574]\tvalid_0's binary_logloss: 0.441518\n[576]\tvalid_0's binary_logloss: 0.44151\n[578]\tvalid_0's binary_logloss: 0.441503\n[580]\tvalid_0's binary_logloss: 0.441495\n[582]\tvalid_0's binary_logloss: 0.441487\n[584]\tvalid_0's binary_logloss: 0.44148\n[586]\tvalid_0's binary_logloss: 0.441474\n[588]\tvalid_0's binary_logloss: 0.441467\n[590]\tvalid_0's binary_logloss: 0.441459\n[592]\tvalid_0's binary_logloss: 0.441453\n[594]\tvalid_0's binary_logloss: 0.441445\n[596]\tvalid_0's binary_logloss: 0.441439\n[598]\tvalid_0's binary_logloss: 0.441431\n[600]\tvalid_0's binary_logloss: 0.441424\n[602]\tvalid_0's binary_logloss: 0.441417\n[604]\tvalid_0's binary_logloss: 0.44141\n[606]\tvalid_0's binary_logloss: 0.441402\n[608]\tvalid_0's binary_logloss: 0.441396\n[610]\tvalid_0's binary_logloss: 0.44139\n[612]\tvalid_0's binary_logloss: 0.441383\n[614]\tvalid_0's binary_logloss: 0.441377\n[616]\tvalid_0's binary_logloss: 0.441371\n[618]\tvalid_0's binary_logloss: 0.441363\n[620]\tvalid_0's binary_logloss: 0.441356\n[622]\tvalid_0's binary_logloss: 0.44135\n[624]\tvalid_0's binary_logloss: 0.441344\n[626]\tvalid_0's binary_logloss: 0.441337\n[628]\tvalid_0's binary_logloss: 0.44133\n[630]\tvalid_0's binary_logloss: 0.441325\n[632]\tvalid_0's binary_logloss: 0.441318\n[634]\tvalid_0's binary_logloss: 0.441311\n[636]\tvalid_0's binary_logloss: 0.441304\n[638]\tvalid_0's binary_logloss: 0.441299\n[640]\tvalid_0's binary_logloss: 0.441293\n[642]\tvalid_0's binary_logloss: 0.441288\n[644]\tvalid_0's binary_logloss: 0.441282\n[646]\tvalid_0's binary_logloss: 0.441276\n[648]\tvalid_0's binary_logloss: 0.441271\n[650]\tvalid_0's binary_logloss: 0.441264\n[652]\tvalid_0's binary_logloss: 0.441259\n[654]\tvalid_0's binary_logloss: 0.441251\n[656]\tvalid_0's binary_logloss: 0.441245\n[658]\tvalid_0's binary_logloss: 0.44124\n[660]\tvalid_0's binary_logloss: 0.441233\n[662]\tvalid_0's binary_logloss: 0.441227\n[664]\tvalid_0's binary_logloss: 0.44122\n[666]\tvalid_0's binary_logloss: 0.441214\n[668]\tvalid_0's binary_logloss: 0.441208\n[670]\tvalid_0's binary_logloss: 0.441204\n[672]\tvalid_0's binary_logloss: 0.441199\n[674]\tvalid_0's binary_logloss: 0.441192\n[676]\tvalid_0's binary_logloss: 0.441186\n[678]\tvalid_0's binary_logloss: 0.44118\n[680]\tvalid_0's binary_logloss: 0.441174\n[682]\tvalid_0's binary_logloss: 0.441169\n[684]\tvalid_0's binary_logloss: 0.441163\n[686]\tvalid_0's binary_logloss: 0.441157\n[688]\tvalid_0's binary_logloss: 0.441152\n[690]\tvalid_0's binary_logloss: 0.441147\n[692]\tvalid_0's binary_logloss: 0.441142\n[694]\tvalid_0's binary_logloss: 0.441136\n[696]\tvalid_0's binary_logloss: 0.441132\n[698]\tvalid_0's binary_logloss: 0.441126\n[700]\tvalid_0's binary_logloss: 0.441121\n[702]\tvalid_0's binary_logloss: 0.441116\n[704]\tvalid_0's binary_logloss: 0.441111\n[706]\tvalid_0's binary_logloss: 0.441105\n[708]\tvalid_0's binary_logloss: 0.4411\n[710]\tvalid_0's binary_logloss: 0.441095\n[712]\tvalid_0's binary_logloss: 0.441089\n[714]\tvalid_0's binary_logloss: 0.441083\n[716]\tvalid_0's binary_logloss: 0.441077\n[718]\tvalid_0's binary_logloss: 0.441072\n[720]\tvalid_0's binary_logloss: 0.441067\n[722]\tvalid_0's binary_logloss: 0.441061\n[724]\tvalid_0's binary_logloss: 0.441055\n[726]\tvalid_0's binary_logloss: 0.44105\n[728]\tvalid_0's binary_logloss: 0.441045\n[730]\tvalid_0's binary_logloss: 0.441039\n[732]\tvalid_0's binary_logloss: 0.441034\n[734]\tvalid_0's binary_logloss: 0.441029\n[736]\tvalid_0's binary_logloss: 0.441025\n[738]\tvalid_0's binary_logloss: 0.44102\n[740]\tvalid_0's binary_logloss: 0.441015\n[742]\tvalid_0's binary_logloss: 0.44101\n[744]\tvalid_0's binary_logloss: 0.441006\n[746]\tvalid_0's binary_logloss: 0.441001\n[748]\tvalid_0's binary_logloss: 0.440996\n[750]\tvalid_0's binary_logloss: 0.44099\n[752]\tvalid_0's binary_logloss: 0.440985\n[754]\tvalid_0's binary_logloss: 0.440981\n[756]\tvalid_0's binary_logloss: 0.440976\n[758]\tvalid_0's binary_logloss: 0.440971\n[760]\tvalid_0's binary_logloss: 0.440967\n[762]\tvalid_0's binary_logloss: 0.440962\n[764]\tvalid_0's binary_logloss: 0.440958\n[766]\tvalid_0's binary_logloss: 0.440952\n[768]\tvalid_0's binary_logloss: 0.440948\n[770]\tvalid_0's binary_logloss: 0.440943\n[772]\tvalid_0's binary_logloss: 0.440938\n[774]\tvalid_0's binary_logloss: 0.440934\n[776]\tvalid_0's binary_logloss: 0.44093\n[778]\tvalid_0's binary_logloss: 0.440925\n[780]\tvalid_0's binary_logloss: 0.440921\n[782]\tvalid_0's binary_logloss: 0.440917\n[784]\tvalid_0's binary_logloss: 0.440912\n[786]\tvalid_0's binary_logloss: 0.440907\n[788]\tvalid_0's binary_logloss: 0.440903\n[790]\tvalid_0's binary_logloss: 0.440899\n[792]\tvalid_0's binary_logloss: 0.440894\n[794]\tvalid_0's binary_logloss: 0.440889\n[796]\tvalid_0's binary_logloss: 0.440885\n[798]\tvalid_0's binary_logloss: 0.440881\n[800]\tvalid_0's binary_logloss: 0.440877\n[802]\tvalid_0's binary_logloss: 0.440873\n[804]\tvalid_0's binary_logloss: 0.440868\n[806]\tvalid_0's binary_logloss: 0.440864\n[808]\tvalid_0's binary_logloss: 0.440861\n[810]\tvalid_0's binary_logloss: 0.440857\n[812]\tvalid_0's binary_logloss: 0.440853\n[814]\tvalid_0's binary_logloss: 0.440849\n[816]\tvalid_0's binary_logloss: 0.440844\n[818]\tvalid_0's binary_logloss: 0.44084\n[820]\tvalid_0's binary_logloss: 0.440837\n[822]\tvalid_0's binary_logloss: 0.440833\n[824]\tvalid_0's binary_logloss: 0.440828\n[826]\tvalid_0's binary_logloss: 0.440825\n[828]\tvalid_0's binary_logloss: 0.440819\n[830]\tvalid_0's binary_logloss: 0.440816\n[832]\tvalid_0's binary_logloss: 0.440811\n[834]\tvalid_0's binary_logloss: 0.440807\n[836]\tvalid_0's binary_logloss: 0.440802\n[838]\tvalid_0's binary_logloss: 0.440799\n[840]\tvalid_0's binary_logloss: 0.440795\n[842]\tvalid_0's binary_logloss: 0.44079\n[844]\tvalid_0's binary_logloss: 0.440787\n[846]\tvalid_0's binary_logloss: 0.440784\n[848]\tvalid_0's binary_logloss: 0.440779\n[850]\tvalid_0's binary_logloss: 0.440775\n[852]\tvalid_0's binary_logloss: 0.440772\n[854]\tvalid_0's binary_logloss: 0.440768\n[856]\tvalid_0's binary_logloss: 0.440765\n[858]\tvalid_0's binary_logloss: 0.440759\n[860]\tvalid_0's binary_logloss: 0.440755\n[862]\tvalid_0's binary_logloss: 0.440752\n[864]\tvalid_0's binary_logloss: 0.440748\n[866]\tvalid_0's binary_logloss: 0.440745\n[868]\tvalid_0's binary_logloss: 0.440741\n[870]\tvalid_0's binary_logloss: 0.440735\n[872]\tvalid_0's binary_logloss: 0.44073\n[874]\tvalid_0's binary_logloss: 0.440725\n[876]\tvalid_0's binary_logloss: 0.440721\n[878]\tvalid_0's binary_logloss: 0.440718\n[880]\tvalid_0's binary_logloss: 0.440713\n[882]\tvalid_0's binary_logloss: 0.440709\n[884]\tvalid_0's binary_logloss: 0.440705\n[886]\tvalid_0's binary_logloss: 0.440702\n[888]\tvalid_0's binary_logloss: 0.440698\n[890]\tvalid_0's binary_logloss: 0.440695\n[892]\tvalid_0's binary_logloss: 0.440692\n[894]\tvalid_0's binary_logloss: 0.440688\n[896]\tvalid_0's binary_logloss: 0.440684\n[898]\tvalid_0's binary_logloss: 0.44068\n[900]\tvalid_0's binary_logloss: 0.440676\n[902]\tvalid_0's binary_logloss: 0.440673\n[904]\tvalid_0's binary_logloss: 0.440669\n[906]\tvalid_0's binary_logloss: 0.440666\n[908]\tvalid_0's binary_logloss: 0.440662\n[910]\tvalid_0's binary_logloss: 0.440658\n[912]\tvalid_0's binary_logloss: 0.440653\n[914]\tvalid_0's binary_logloss: 0.44065\n[916]\tvalid_0's binary_logloss: 0.440646\n[918]\tvalid_0's binary_logloss: 0.440643\n[920]\tvalid_0's binary_logloss: 0.440639\n[922]\tvalid_0's binary_logloss: 0.440635\n[924]\tvalid_0's binary_logloss: 0.440631\n[926]\tvalid_0's binary_logloss: 0.440628\n[928]\tvalid_0's binary_logloss: 0.440625\n[930]\tvalid_0's binary_logloss: 0.440621\n[932]\tvalid_0's binary_logloss: 0.440618\n[934]\tvalid_0's binary_logloss: 0.440614\n[936]\tvalid_0's binary_logloss: 0.440611\n[938]\tvalid_0's binary_logloss: 0.440608\n[940]\tvalid_0's binary_logloss: 0.440605\n[942]\tvalid_0's binary_logloss: 0.440602\n[944]\tvalid_0's binary_logloss: 0.440599\n[946]\tvalid_0's binary_logloss: 0.440596\n[948]\tvalid_0's binary_logloss: 0.440592\n[950]\tvalid_0's binary_logloss: 0.440588\n[952]\tvalid_0's binary_logloss: 0.440585\n[954]\tvalid_0's binary_logloss: 0.440582\n[956]\tvalid_0's binary_logloss: 0.440579\n[958]\tvalid_0's binary_logloss: 0.440576\n[960]\tvalid_0's binary_logloss: 0.440573\n[962]\tvalid_0's binary_logloss: 0.44057\n[964]\tvalid_0's binary_logloss: 0.440565\n[966]\tvalid_0's binary_logloss: 0.440563\n[968]\tvalid_0's binary_logloss: 0.440561\n[970]\tvalid_0's binary_logloss: 0.440557\n[972]\tvalid_0's binary_logloss: 0.440553\n[974]\tvalid_0's binary_logloss: 0.440551\n[976]\tvalid_0's binary_logloss: 0.440548\n[978]\tvalid_0's binary_logloss: 0.440544\n[980]\tvalid_0's binary_logloss: 0.440541\n[982]\tvalid_0's binary_logloss: 0.440538\n[984]\tvalid_0's binary_logloss: 0.440536\n[986]\tvalid_0's binary_logloss: 0.440533\n[988]\tvalid_0's binary_logloss: 0.44053\n[990]\tvalid_0's binary_logloss: 0.440527\n[992]\tvalid_0's binary_logloss: 0.440524\n[994]\tvalid_0's binary_logloss: 0.440522\n[996]\tvalid_0's binary_logloss: 0.440519\n[998]\tvalid_0's binary_logloss: 0.440516\n[1000]\tvalid_0's binary_logloss: 0.440512\n","name":"stdout"}],"source":"import pandas as pd\r\nimport numpy as np\r\nimport pandas as pd\r\nimport time\r\nimport sys\r\nimport datetime\r\nimport gc\r\nfrom sklearn.model_selection import StratifiedShuffleSplit\r\nfrom sklearn.model_selection import KFold, cross_val_score, train_test_split\r\nfrom sklearn.model_selection import StratifiedKFold\r\nfrom sklearn.metrics import roc_auc_score, log_loss\r\nimport lightgbm as lgb\r\nfrom sklearn.preprocessing import OneHotEncoder, LabelEncoder\r\nfrom sklearn.feature_extraction.text import CountVectorizer\r\nfrom sklearn.feature_selection import chi2, SelectPercentile\r\nimport math\r\nfrom sklearn.metrics import f1_score\r\nimport jieba\r\nimport jieba.posseg as psg\r\nfrom collections import Counter\r\nimport functools\r\nfrom time import time\r\nfrom sklearn import preprocessing\r\n\r\nimport pandas as pd\r\nfrom scipy import sparse\r\nfrom sklearn.model_selection import train_test_split, StratifiedKFold\r\nimport lightgbm as lgb\r\nfrom sklearn import metrics\r\nimport os, time, datetime\r\nimport numpy as np\r\nfrom sklearn import preprocessing\r\n\r\n\r\ndef norm(train_df,features):    \r\n    scaler = preprocessing.QuantileTransformer(random_state=0) \r\n    scaler.fit(train_df[features])  \r\n    train_df[features]=scaler.transform(train_df[features]) \r\n\r\ntrain_data=pd.read_pickle(\"/home/kesci/train_data9-9.5.pickle\")\r\nprint(train_data.shape)\r\n\r\n# norm(train_data,['title_count','title_sum','title_nunique_prefix'])\r\n\r\n# test_data=pd.read_pickle(\"/home/kesci/work/first_zzp/test_feature_first_no_query_and_title_data.pickle\")\r\nfea=list(train_data.columns)\r\n# print(fea)\r\nfea.remove('label')\r\n# fea.remove('query_title_types')\r\n\r\ndrop_list=[ ]\r\nfor i in drop_list:########删除一些不参与训练的特征\r\n    fea.remove(i)\r\n    \r\nprint(fea)\r\nimport numpy as np\r\nfor i in train_data:#########因为当时5千万数据，所有特征没办法直接加载到内存，需要降低内存消耗\r\n    if isinstance(train_data[i],np.float64):\r\n        train_data[i]=train_data[i].astype(np.float32)\r\n    if isinstance(train_data[i],np.int64):\r\n        train_data[i]=train_data[i].astype(np.int16)\r\n\r\n\r\n#############此处划分训练集的方式，建议改成 DataFrame.sample()\r\ndata_split=StratifiedShuffleSplit(n_splits=2,test_size=0.002,random_state=666)\r\ntrain_index,vaild_index=data_split.split(train_data['label'],train_data['label']).__next__()\r\nlgb_train = lgb.Dataset(train_data[fea], train_data['label'])\r\nlgb_eval = lgb.Dataset(train_data[fea].iloc[vaild_index], train_data['label'].iloc[vaild_index], reference=lgb_train)\r\n\r\ndel train_data\r\nimport gc\r\ngc.collect()\r\nparams = {\r\n        'boosting_type':'gbdt', 'num_leaves':63, 'max_depth':-1, 'n_estimators':1200, 'objective':'binary',\r\n        'subsample':0.8, 'colsample_bytree':0.8, 'subsample_freq':1,'num_boost_round':1000,\r\n        'learning_rate':0.007, 'random_state':666, 'silent':False,'verbose':1,\r\n        'reg_alpha':0.0,'reg_lambda':1,'feature_fraction':0.8,'min_child_weight':50\r\n}\r\n# print(\"start2\")\r\ngbm = lgb.train(params,\r\n                lgb_train,\r\n                valid_sets=lgb_eval,\r\n                verbose_eval=2,\r\n                #  init_model='/home/kesci/work/first_zzp/model/no_title_newn_norm.txt'\r\n                )","execution_count":1},{"metadata":{"id":"3FBF9AC464A84795803B49A44D18A9CE","collapsed":false,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"stream","text":"title_sum : 2849\ntitle_count : 2281\ntitle_ctr : 5588\ntitle_query_dot_similarity : 2612\ntitle_query_wmd_dis : 862\nquery_word_num : 1470\ntitle_word_num : 298\nis_query_in_title : 71\nquery_len : 3078\ntitle_len : 277\nlen_title-query : 266\nlen_query/title : 1351\nq_t_word_match_share : 2963\nq_t_jaccard : 605\nq_t_common_words : 3027\nq_t_total_unique_words : 164\nq_t_wc_diff : 130\nq_t_wc_ratio : 678\nq_t_wc_diff_unique : 707\nq_t_wc_ratio_unique : 1931\ntitle_nunique_prefix : 1691\nLevenshtein_dis : 119\nLevenshtein_ratio : 328\nLevenshtein_jaro : 573\nLevenshtein_jaro_winkler : 2013\nlongistStr_len : 2859\nprefix_title_pos : 517\nprefix_title_relative_pos : 636\nlen_title-query_word : 225\nlen_query/title_word : 403\ntitle_query_leven_rate : 699\nlcseque_len : 804\npreifx_detail_title_pos : 41\ncityblock_sim : 908\nnorm_similarity : 1491\ncosine_similarity : 6565\n0_lsi : 1838\n1_lsi : 540\n2_lsi : 1663\n3_lsi : 212\n4_lsi : 669\n5_lsi : 185\n6_lsi : 871\n7_lsi : 515\n8_lsi : 281\n9_lsi : 998\n10_lsi : 222\n11_lsi : 794\n12_lsi : 105\n13_lsi : 320\n14_lsi : 337\n15_lsi : 119\n16_lsi : 303\n17_lsi : 206\n18_lsi : 349\n19_lsi : 393\n","name":"stdout"}],"source":"for index,i in enumerate(fea):\r\n    print(i,\":\",gbm.feature_importance()[index])","execution_count":2},{"metadata":{"id":"F013017B5EA149628DE11A9345AB700C","mdEditEnable":false},"cell_type":"markdown","source":"# 保存预训练模型"},{"metadata":{"id":"CABB743E0B2B4132A9261FBB1D131D3A","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"<lightgbm.basic.Booster at 0x7fc1bc9fedd8>"},"execution_count":4}],"source":"gbm.save_model('/home/kesci/work/first_zzp/model/lgb1200_w2vchange.txt')","execution_count":4},{"metadata":{"id":"BAE257B25AD6448A93189B0D16C93B2A","mdEditEnable":false},"cell_type":"markdown","source":"# 下面这段代码是加载预训练lgb模型"},{"metadata":{"id":"C6373644E8E64D758664A573943FF945","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[],"source":"import lightgbm as lgb\ngbm=lgb.Booster(model_file='/home/kesci/work/first_zzp/model/lgb1200.txt')","execution_count":1},{"metadata":{"id":"92E25A7DB45145BC9B5D1E5FA0E1B96E","mdEditEnable":false},"cell_type":"markdown","source":"# 下面是加载测试集"},{"metadata":{"id":"7BA386C1E83340049438D980CB246F0A","collapsed":true,"scrolled":false},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"             title_sum   title_count  title_ctr  title_query_dot_similarity  \\\n0                  NaN           NaN        NaN                    2.467954   \n1         5.585586e-01  5.590591e-01   0.238788                    3.494070   \n2         2.162162e-01  1.826827e-01   0.192738                    2.843601   \n3         8.588589e-01  7.862863e-01   0.445409                    2.588134   \n4                  NaN           NaN        NaN                    2.054980   \n5         1.000000e-07  6.806807e-02   0.044410                    3.042946   \n6         5.855856e-01  4.819820e-01   0.372021                    2.958290   \n7         7.507508e-01  6.861862e-01   0.361941                    2.845859   \n8         8.523524e-01  8.303303e-01   0.326117                    2.922247   \n9                  NaN           NaN        NaN                    3.120164   \n10        7.952953e-01  8.678679e-01   0.168810                    4.774547   \n11        8.523524e-01  7.467467e-01   0.524060                    2.029088   \n12        7.317317e-01  6.741742e-01   0.343601                    2.032057   \n13        8.128128e-01  6.861862e-01   0.532255                    1.857624   \n14                 NaN           NaN        NaN                    3.405394   \n15        3.173173e-01  2.667668e-01   0.240432                    4.849490   \n16        3.173173e-01  6.806807e-02   0.747746                    3.451626   \n17        9.339339e-01  9.499499e-01   0.241851                    8.938723   \n18        9.369369e-01  9.497497e-01   0.254081                    9.772754   \n19        7.597598e-01  7.802803e-01   0.237216                    5.266338   \n20        7.207207e-01  8.203203e-01   0.150934                    1.828810   \n21                 NaN           NaN        NaN                    0.830332   \n22        8.128128e-01  7.842843e-01   0.323873                    1.312244   \n23        8.183183e-01  7.647648e-01   0.374164                    3.183734   \n24        8.878879e-01  8.613614e-01   0.353391                    4.587874   \n25        7.507508e-01  5.150150e-01   0.769868                    3.947191   \n26        8.648649e-01  8.043043e-01   0.424667                    3.941643   \n27        9.514515e-01  9.503504e-01   0.311929                    4.658513   \n28        7.082082e-01  6.311311e-01   0.370347                    3.669347   \n29        8.278278e-01  7.962963e-01   0.335255                    4.505778   \n...                ...           ...        ...                         ...   \n19999970  9.359359e-01  8.958959e-01   0.458500                    2.598250   \n19999971  7.597598e-01  6.366366e-01   0.480988                    2.529526   \n19999972  2.162162e-01  1.826827e-01   0.192738                    3.740707   \n19999973  3.173173e-01  3.133133e-01   0.196087                    3.287300   \n19999974  2.162162e-01  3.523524e-01   0.087692                    4.147095   \n19999975  5.265265e-01  6.021021e-01   0.170917                    2.822454   \n19999976  5.265265e-01  4.029029e-01   0.386673                    3.907083   \n19999977  6.946947e-01  7.597598e-01   0.186255                    3.991168   \n19999978  4.439439e-01  4.704705e-01   0.197964                    3.470595   \n19999979  6.091091e-01  5.895896e-01   0.269661                    3.878566   \n19999980           NaN           NaN        NaN                    3.119522   \n19999981  2.162162e-01  1.481481e-01   0.232531                    4.370203   \n19999982           NaN           NaN        NaN                    3.178349   \n19999983  2.162162e-01  1.000000e-07   0.610918                    4.874597   \n19999984  8.413413e-01  8.613614e-01   0.240591                    2.993040   \n19999985  2.162162e-01  4.459459e-01   0.059770                    2.865118   \n19999986           NaN           NaN        NaN                    3.271039   \n19999987           NaN           NaN        NaN                    4.138363   \n19999988  1.000000e-07  6.806807e-02   0.044410                    2.749617   \n19999989           NaN           NaN        NaN                    3.561204   \n19999990  1.000000e-07  6.806807e-02   0.044410                    2.686338   \n19999991  3.173173e-01  5.340340e-01   0.079210                    4.557244   \n19999992           NaN           NaN        NaN                    2.077618   \n19999993           NaN           NaN        NaN                    2.974397   \n19999994           NaN           NaN        NaN                    2.394917   \n19999995           NaN           NaN        NaN                    1.896169   \n19999996  5.585586e-01  5.150150e-01   0.286846                    1.978527   \n19999997  6.301301e-01  5.245245e-01   0.391830                    2.371618   \n19999998  9.544545e-01  9.713543e-01   0.217284                    3.215105   \n19999999  8.923924e-01  9.129129e-01   0.234969                    2.728368   \n\n          title_query_wmd_dis  query_word_num  title_word_num  \\\n0                    2.962266               5              15   \n1                    1.819170               5               4   \n2                    2.426266               5               9   \n3                    2.903956               5              11   \n4                    2.537336               5               9   \n5                    2.488552               5               8   \n6                    1.730939               5               5   \n7                    2.596473               4              21   \n8                    2.629477               4              18   \n9                    2.578096               4              18   \n10                   2.290192               4              15   \n11                   2.795375               4              16   \n12                   2.815629               4              17   \n13                   2.592353               4              13   \n14                   2.681484               3              12   \n15                   2.621820               3              13   \n16                   2.869328               3              19   \n17                   2.231126               6              13   \n18                   1.665654               6              12   \n19                   2.687106               6              19   \n20                   1.769321               4               9   \n21                   3.229184               4              15   \n22                   3.273366               4              19   \n23                   2.148116               6              14   \n24                   2.271073               6              11   \n25                   2.052032               6              19   \n26                   1.745707               6              10   \n27                   1.788092               6              14   \n28                   2.791334               6              22   \n29                   2.890891               6              19   \n...                       ...             ...             ...   \n19999970             2.980999               3              22   \n19999971             2.867128               3              17   \n19999972             2.782734               3              14   \n19999973             2.818681               3              14   \n19999974             2.398520               3               8   \n19999975             3.020941               3              17   \n19999976             2.705989               3              13   \n19999977             2.428524               3              13   \n19999978             2.437507               3              10   \n19999979             2.666055               3              18   \n19999980             3.057641               3              19   \n19999981             1.841433               3               6   \n19999982             2.551256               3              15   \n19999983             1.964968               3               8   \n19999984             2.705538               3              15   \n19999985             2.889474               3              19   \n19999986             2.645395               3              14   \n19999987             2.495341               3              12   \n19999988             2.525988               3              11   \n19999989             2.733928               3              13   \n19999990             2.708770               3              17   \n19999991             2.295096               3              12   \n19999992             3.122913               3              19   \n19999993             2.954713               3              15   \n19999994             2.933309               3              11   \n19999995             2.675375               3              14   \n19999996             3.241991               3              18   \n19999997             3.009820               4              15   \n19999998             2.631603               4              14   \n19999999             2.486051               4              13   \n\n          is_query_in_title  query_len  title_len    ...       10_lsi  \\\n0                         0         21         64    ...     0.012029   \n1                         0         21         15    ...     0.012029   \n2                         0         21         34    ...     0.012029   \n3                         0         21         47    ...     0.012029   \n4                         0         21         34    ...     0.012029   \n5                         0         21         37    ...     0.012029   \n6                         0         21         20    ...     0.012029   \n7                         0         17         83    ...     0.015682   \n8                         0         17         77    ...     0.015682   \n9                         0         17         76    ...     0.015682   \n10                        0         17         65    ...     0.015682   \n11                        0         19         69    ...    -0.005699   \n12                        0         19         71    ...    -0.005699   \n13                        0         19         58    ...    -0.005699   \n14                        0         15         53    ...    -0.000260   \n15                        0         15         66    ...    -0.000260   \n16                        0         15         78    ...    -0.000260   \n17                        0         27         59    ...    -0.003763   \n18                        0         27         55    ...    -0.003763   \n19                        0         27         72    ...    -0.003763   \n20                        0         12         32    ...     0.006122   \n21                        0         12         64    ...     0.006122   \n22                        0         12         75    ...     0.006122   \n23                        0         26         64    ...    -0.019510   \n24                        0         26         57    ...    -0.019510   \n25                        0         26         78    ...    -0.019510   \n26                        0         26         44    ...    -0.019510   \n27                        0         26         60    ...    -0.019510   \n28                        0         26         94    ...    -0.019510   \n29                        0         26         91    ...    -0.019510   \n...                     ...        ...        ...    ...          ...   \n19999970                  0         13         93    ...    -0.008214   \n19999971                  0         13         69    ...    -0.008214   \n19999972                  0         13         61    ...    -0.008214   \n19999973                  0         13         59    ...    -0.008214   \n19999974                  0         13         35    ...    -0.008214   \n19999975                  0         13         71    ...    -0.008214   \n19999976                  0         13         59    ...    -0.008214   \n19999977                  0         13         52    ...    -0.001539   \n19999978                  0         13         41    ...    -0.001539   \n19999979                  0         13         75    ...    -0.001539   \n19999980                  0         13         90    ...    -0.001539   \n19999981                  0         13         25    ...    -0.001539   \n19999982                  0         13         65    ...    -0.001539   \n19999983                  0         13         35    ...    -0.001539   \n19999984                  0         13         59    ...    -0.001539   \n19999985                  0         13         79    ...    -0.001539   \n19999986                  0         13         58    ...    -0.001539   \n19999987                  0         13         56    ...    -0.001539   \n19999988                  1         13         52    ...    -0.001539   \n19999989                  1         13         58    ...    -0.001539   \n19999990                  0         13         71    ...    -0.001539   \n19999991                  1         13         53    ...    -0.001539   \n19999992                  0         13         75    ...    -0.001539   \n19999993                  0         13         67    ...    -0.001539   \n19999994                  0         16         51    ...    -0.001456   \n19999995                  0         16         67    ...    -0.001456   \n19999996                  0         16         80    ...    -0.001456   \n19999997                  0         19         63    ...     0.002429   \n19999998                  0         19         58    ...     0.002429   \n19999999                  0         19         57    ...     0.002429   \n\n            11_lsi    12_lsi    13_lsi    14_lsi    15_lsi    16_lsi  \\\n0         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n1         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n2         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n3         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n4         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n5         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n6         0.052054  0.024495 -0.059344 -0.061288 -0.004410 -0.040278   \n7        -0.005775 -0.000173  0.008444  0.005047 -0.001640 -0.010071   \n8        -0.005775 -0.000173  0.008444  0.005047 -0.001640 -0.010071   \n9        -0.005775 -0.000173  0.008444  0.005047 -0.001640 -0.010071   \n10       -0.005775 -0.000173  0.008444  0.005047 -0.001640 -0.010071   \n11       -0.009435 -0.001577  0.001402 -0.006422  0.000884 -0.001094   \n12       -0.009435 -0.001577  0.001402 -0.006422  0.000884 -0.001094   \n13       -0.009435 -0.001577  0.001402 -0.006422  0.000884 -0.001094   \n14       -0.000277 -0.000152  0.000232 -0.000071  0.000067  0.000109   \n15       -0.000277 -0.000152  0.000232 -0.000071  0.000067  0.000109   \n16       -0.000277 -0.000152  0.000232 -0.000071  0.000067  0.000109   \n17       -0.003700 -0.004570  0.004670 -0.000729  0.001268  0.006215   \n18       -0.003700 -0.004570  0.004670 -0.000729  0.001268  0.006215   \n19       -0.003700 -0.004570  0.004670 -0.000729  0.001268  0.006215   \n20       -0.023177 -0.019614  0.006968  0.015620 -0.022984 -0.059825   \n21       -0.023177 -0.019614  0.006968  0.015620 -0.022984 -0.059825   \n22       -0.023177 -0.019614  0.006968  0.015620 -0.022984 -0.059825   \n23       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n24       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n25       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n26       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n27       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n28       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n29       -0.037576 -0.015826 -0.008397 -0.027774 -0.001002 -0.007665   \n...            ...       ...       ...       ...       ...       ...   \n19999970 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999971 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999972 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999973 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999974 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999975 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999976 -0.009903 -0.005569  0.000742 -0.003285 -0.003038  0.000579   \n19999977 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999978 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999979 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999980 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999981 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999982 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999983 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999984 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999985 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999986 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999987 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999988 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999989 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999990 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999991 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999992 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999993 -0.002263 -0.001413  0.003413  0.000706 -0.002660 -0.004498   \n19999994 -0.003631 -0.002238 -0.002284 -0.000340 -0.002185 -0.002078   \n19999995 -0.003631 -0.002238 -0.002284 -0.000340 -0.002185 -0.002078   \n19999996 -0.003631 -0.002238 -0.002284 -0.000340 -0.002185 -0.002078   \n19999997  0.012458  0.005203 -0.013807 -0.014367 -0.001159 -0.008744   \n19999998  0.012458  0.005203 -0.013807 -0.014367 -0.001159 -0.008744   \n19999999  0.012458  0.005203 -0.013807 -0.014367 -0.001159 -0.008744   \n\n            17_lsi    18_lsi    19_lsi  \n0        -0.003942 -0.026607 -0.046880  \n1        -0.003942 -0.026607 -0.046880  \n2        -0.003942 -0.026607 -0.046880  \n3        -0.003942 -0.026607 -0.046880  \n4        -0.003942 -0.026607 -0.046880  \n5        -0.003942 -0.026607 -0.046880  \n6        -0.003942 -0.026607 -0.046880  \n7         0.001998 -0.008235  0.005822  \n8         0.001998 -0.008235  0.005822  \n9         0.001998 -0.008235  0.005822  \n10        0.001998 -0.008235  0.005822  \n11        0.001143 -0.002296 -0.000518  \n12        0.001143 -0.002296 -0.000518  \n13        0.001143 -0.002296 -0.000518  \n14       -0.000050 -0.000118  0.000062  \n15       -0.000050 -0.000118  0.000062  \n16       -0.000050 -0.000118  0.000062  \n17        0.002262 -0.006971  0.006275  \n18        0.002262 -0.006971  0.006275  \n19        0.002262 -0.006971  0.006275  \n20        0.007075 -0.031380 -0.013899  \n21        0.007075 -0.031380 -0.013899  \n22        0.007075 -0.031380 -0.013899  \n23        0.011621 -0.019590 -0.010583  \n24        0.011621 -0.019590 -0.010583  \n25        0.011621 -0.019590 -0.010583  \n26        0.011621 -0.019590 -0.010583  \n27        0.011621 -0.019590 -0.010583  \n28        0.011621 -0.019590 -0.010583  \n29        0.011621 -0.019590 -0.010583  \n...            ...       ...       ...  \n19999970  0.001678 -0.008010 -0.001814  \n19999971  0.001678 -0.008010 -0.001814  \n19999972  0.001678 -0.008010 -0.001814  \n19999973  0.001678 -0.008010 -0.001814  \n19999974  0.001678 -0.008010 -0.001814  \n19999975  0.001678 -0.008010 -0.001814  \n19999976  0.001678 -0.008010 -0.001814  \n19999977 -0.001064  0.006256 -0.005480  \n19999978 -0.001064  0.006256 -0.005480  \n19999979 -0.001064  0.006256 -0.005480  \n19999980 -0.001064  0.006256 -0.005480  \n19999981 -0.001064  0.006256 -0.005480  \n19999982 -0.001064  0.006256 -0.005480  \n19999983 -0.001064  0.006256 -0.005480  \n19999984 -0.001064  0.006256 -0.005480  \n19999985 -0.001064  0.006256 -0.005480  \n19999986 -0.001064  0.006256 -0.005480  \n19999987 -0.001064  0.006256 -0.005480  \n19999988 -0.001064  0.006256 -0.005480  \n19999989 -0.001064  0.006256 -0.005480  \n19999990 -0.001064  0.006256 -0.005480  \n19999991 -0.001064  0.006256 -0.005480  \n19999992 -0.001064  0.006256 -0.005480  \n19999993 -0.001064  0.006256 -0.005480  \n19999994 -0.000240 -0.003171  0.000699  \n19999995 -0.000240 -0.003171  0.000699  \n19999996 -0.000240 -0.003171  0.000699  \n19999997 -0.000959 -0.004910 -0.007838  \n19999998 -0.000959 -0.004910 -0.007838  \n19999999 -0.000959 -0.004910 -0.007838  \n\n[20000000 rows x 56 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>title_sum</th>\n      <th>title_count</th>\n      <th>title_ctr</th>\n      <th>title_query_dot_similarity</th>\n      <th>title_query_wmd_dis</th>\n      <th>query_word_num</th>\n      <th>title_word_num</th>\n      <th>is_query_in_title</th>\n      <th>query_len</th>\n      <th>title_len</th>\n      <th>...</th>\n      <th>10_lsi</th>\n      <th>11_lsi</th>\n      <th>12_lsi</th>\n      <th>13_lsi</th>\n      <th>14_lsi</th>\n      <th>15_lsi</th>\n      <th>16_lsi</th>\n      <th>17_lsi</th>\n      <th>18_lsi</th>\n      <th>19_lsi</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2.467954</td>\n      <td>2.962266</td>\n      <td>5</td>\n      <td>15</td>\n      <td>0</td>\n      <td>21</td>\n      <td>64</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>5.585586e-01</td>\n      <td>5.590591e-01</td>\n      <td>0.238788</td>\n      <td>3.494070</td>\n      <td>1.819170</td>\n      <td>5</td>\n      <td>4</td>\n      <td>0</td>\n      <td>21</td>\n      <td>15</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2.162162e-01</td>\n      <td>1.826827e-01</td>\n      <td>0.192738</td>\n      <td>2.843601</td>\n      <td>2.426266</td>\n      <td>5</td>\n      <td>9</td>\n      <td>0</td>\n      <td>21</td>\n      <td>34</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>8.588589e-01</td>\n      <td>7.862863e-01</td>\n      <td>0.445409</td>\n      <td>2.588134</td>\n      <td>2.903956</td>\n      <td>5</td>\n      <td>11</td>\n      <td>0</td>\n      <td>21</td>\n      <td>47</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2.054980</td>\n      <td>2.537336</td>\n      <td>5</td>\n      <td>9</td>\n      <td>0</td>\n      <td>21</td>\n      <td>34</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>1.000000e-07</td>\n      <td>6.806807e-02</td>\n      <td>0.044410</td>\n      <td>3.042946</td>\n      <td>2.488552</td>\n      <td>5</td>\n      <td>8</td>\n      <td>0</td>\n      <td>21</td>\n      <td>37</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>5.855856e-01</td>\n      <td>4.819820e-01</td>\n      <td>0.372021</td>\n      <td>2.958290</td>\n      <td>1.730939</td>\n      <td>5</td>\n      <td>5</td>\n      <td>0</td>\n      <td>21</td>\n      <td>20</td>\n      <td>...</td>\n      <td>0.012029</td>\n      <td>0.052054</td>\n      <td>0.024495</td>\n      <td>-0.059344</td>\n      <td>-0.061288</td>\n      <td>-0.004410</td>\n      <td>-0.040278</td>\n      <td>-0.003942</td>\n      <td>-0.026607</td>\n      <td>-0.046880</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>7.507508e-01</td>\n      <td>6.861862e-01</td>\n      <td>0.361941</td>\n      <td>2.845859</td>\n      <td>2.596473</td>\n      <td>4</td>\n      <td>21</td>\n      <td>0</td>\n      <td>17</td>\n      <td>83</td>\n      <td>...</td>\n      <td>0.015682</td>\n      <td>-0.005775</td>\n      <td>-0.000173</td>\n      <td>0.008444</td>\n      <td>0.005047</td>\n      <td>-0.001640</td>\n      <td>-0.010071</td>\n      <td>0.001998</td>\n      <td>-0.008235</td>\n      <td>0.005822</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>8.523524e-01</td>\n      <td>8.303303e-01</td>\n      <td>0.326117</td>\n      <td>2.922247</td>\n      <td>2.629477</td>\n      <td>4</td>\n      <td>18</td>\n      <td>0</td>\n      <td>17</td>\n      <td>77</td>\n      <td>...</td>\n      <td>0.015682</td>\n      <td>-0.005775</td>\n      <td>-0.000173</td>\n      <td>0.008444</td>\n      <td>0.005047</td>\n      <td>-0.001640</td>\n      <td>-0.010071</td>\n      <td>0.001998</td>\n      <td>-0.008235</td>\n      <td>0.005822</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.120164</td>\n      <td>2.578096</td>\n      <td>4</td>\n      <td>18</td>\n      <td>0</td>\n      <td>17</td>\n      <td>76</td>\n      <td>...</td>\n      <td>0.015682</td>\n      <td>-0.005775</td>\n      <td>-0.000173</td>\n      <td>0.008444</td>\n      <td>0.005047</td>\n      <td>-0.001640</td>\n      <td>-0.010071</td>\n      <td>0.001998</td>\n      <td>-0.008235</td>\n      <td>0.005822</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>7.952953e-01</td>\n      <td>8.678679e-01</td>\n      <td>0.168810</td>\n      <td>4.774547</td>\n      <td>2.290192</td>\n      <td>4</td>\n      <td>15</td>\n      <td>0</td>\n      <td>17</td>\n      <td>65</td>\n      <td>...</td>\n      <td>0.015682</td>\n      <td>-0.005775</td>\n      <td>-0.000173</td>\n      <td>0.008444</td>\n      <td>0.005047</td>\n      <td>-0.001640</td>\n      <td>-0.010071</td>\n      <td>0.001998</td>\n      <td>-0.008235</td>\n      <td>0.005822</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>8.523524e-01</td>\n      <td>7.467467e-01</td>\n      <td>0.524060</td>\n      <td>2.029088</td>\n      <td>2.795375</td>\n      <td>4</td>\n      <td>16</td>\n      <td>0</td>\n      <td>19</td>\n      <td>69</td>\n      <td>...</td>\n      <td>-0.005699</td>\n      <td>-0.009435</td>\n      <td>-0.001577</td>\n      <td>0.001402</td>\n      <td>-0.006422</td>\n      <td>0.000884</td>\n      <td>-0.001094</td>\n      <td>0.001143</td>\n      <td>-0.002296</td>\n      <td>-0.000518</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>7.317317e-01</td>\n      <td>6.741742e-01</td>\n      <td>0.343601</td>\n      <td>2.032057</td>\n      <td>2.815629</td>\n      <td>4</td>\n      <td>17</td>\n      <td>0</td>\n      <td>19</td>\n      <td>71</td>\n      <td>...</td>\n      <td>-0.005699</td>\n      <td>-0.009435</td>\n      <td>-0.001577</td>\n      <td>0.001402</td>\n      <td>-0.006422</td>\n      <td>0.000884</td>\n      <td>-0.001094</td>\n      <td>0.001143</td>\n      <td>-0.002296</td>\n      <td>-0.000518</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>8.128128e-01</td>\n      <td>6.861862e-01</td>\n      <td>0.532255</td>\n      <td>1.857624</td>\n      <td>2.592353</td>\n      <td>4</td>\n      <td>13</td>\n      <td>0</td>\n      <td>19</td>\n      <td>58</td>\n      <td>...</td>\n      <td>-0.005699</td>\n      <td>-0.009435</td>\n      <td>-0.001577</td>\n      <td>0.001402</td>\n      <td>-0.006422</td>\n      <td>0.000884</td>\n      <td>-0.001094</td>\n      <td>0.001143</td>\n      <td>-0.002296</td>\n      <td>-0.000518</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.405394</td>\n      <td>2.681484</td>\n      <td>3</td>\n      <td>12</td>\n      <td>0</td>\n      <td>15</td>\n      <td>53</td>\n      <td>...</td>\n      <td>-0.000260</td>\n      <td>-0.000277</td>\n      <td>-0.000152</td>\n      <td>0.000232</td>\n      <td>-0.000071</td>\n      <td>0.000067</td>\n      <td>0.000109</td>\n      <td>-0.000050</td>\n      <td>-0.000118</td>\n      <td>0.000062</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>3.173173e-01</td>\n      <td>2.667668e-01</td>\n      <td>0.240432</td>\n      <td>4.849490</td>\n      <td>2.621820</td>\n      <td>3</td>\n      <td>13</td>\n      <td>0</td>\n      <td>15</td>\n      <td>66</td>\n      <td>...</td>\n      <td>-0.000260</td>\n      <td>-0.000277</td>\n      <td>-0.000152</td>\n      <td>0.000232</td>\n      <td>-0.000071</td>\n      <td>0.000067</td>\n      <td>0.000109</td>\n      <td>-0.000050</td>\n      <td>-0.000118</td>\n      <td>0.000062</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>3.173173e-01</td>\n      <td>6.806807e-02</td>\n      <td>0.747746</td>\n      <td>3.451626</td>\n      <td>2.869328</td>\n      <td>3</td>\n      <td>19</td>\n      <td>0</td>\n      <td>15</td>\n      <td>78</td>\n      <td>...</td>\n      <td>-0.000260</td>\n      <td>-0.000277</td>\n      <td>-0.000152</td>\n      <td>0.000232</td>\n      <td>-0.000071</td>\n      <td>0.000067</td>\n      <td>0.000109</td>\n      <td>-0.000050</td>\n      <td>-0.000118</td>\n      <td>0.000062</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>9.339339e-01</td>\n      <td>9.499499e-01</td>\n      <td>0.241851</td>\n      <td>8.938723</td>\n      <td>2.231126</td>\n      <td>6</td>\n      <td>13</td>\n      <td>0</td>\n      <td>27</td>\n      <td>59</td>\n      <td>...</td>\n      <td>-0.003763</td>\n      <td>-0.003700</td>\n      <td>-0.004570</td>\n      <td>0.004670</td>\n      <td>-0.000729</td>\n      <td>0.001268</td>\n      <td>0.006215</td>\n      <td>0.002262</td>\n      <td>-0.006971</td>\n      <td>0.006275</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>9.369369e-01</td>\n      <td>9.497497e-01</td>\n      <td>0.254081</td>\n      <td>9.772754</td>\n      <td>1.665654</td>\n      <td>6</td>\n      <td>12</td>\n      <td>0</td>\n      <td>27</td>\n      <td>55</td>\n      <td>...</td>\n      <td>-0.003763</td>\n      <td>-0.003700</td>\n      <td>-0.004570</td>\n      <td>0.004670</td>\n      <td>-0.000729</td>\n      <td>0.001268</td>\n      <td>0.006215</td>\n      <td>0.002262</td>\n      <td>-0.006971</td>\n      <td>0.006275</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>7.597598e-01</td>\n      <td>7.802803e-01</td>\n      <td>0.237216</td>\n      <td>5.266338</td>\n      <td>2.687106</td>\n      <td>6</td>\n      <td>19</td>\n      <td>0</td>\n      <td>27</td>\n      <td>72</td>\n      <td>...</td>\n      <td>-0.003763</td>\n      <td>-0.003700</td>\n      <td>-0.004570</td>\n      <td>0.004670</td>\n      <td>-0.000729</td>\n      <td>0.001268</td>\n      <td>0.006215</td>\n      <td>0.002262</td>\n      <td>-0.006971</td>\n      <td>0.006275</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>7.207207e-01</td>\n      <td>8.203203e-01</td>\n      <td>0.150934</td>\n      <td>1.828810</td>\n      <td>1.769321</td>\n      <td>4</td>\n      <td>9</td>\n      <td>0</td>\n      <td>12</td>\n      <td>32</td>\n      <td>...</td>\n      <td>0.006122</td>\n      <td>-0.023177</td>\n      <td>-0.019614</td>\n      <td>0.006968</td>\n      <td>0.015620</td>\n      <td>-0.022984</td>\n      <td>-0.059825</td>\n      <td>0.007075</td>\n      <td>-0.031380</td>\n      <td>-0.013899</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>0.830332</td>\n      <td>3.229184</td>\n      <td>4</td>\n      <td>15</td>\n      <td>0</td>\n      <td>12</td>\n      <td>64</td>\n      <td>...</td>\n      <td>0.006122</td>\n      <td>-0.023177</td>\n      <td>-0.019614</td>\n      <td>0.006968</td>\n      <td>0.015620</td>\n      <td>-0.022984</td>\n      <td>-0.059825</td>\n      <td>0.007075</td>\n      <td>-0.031380</td>\n      <td>-0.013899</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>8.128128e-01</td>\n      <td>7.842843e-01</td>\n      <td>0.323873</td>\n      <td>1.312244</td>\n      <td>3.273366</td>\n      <td>4</td>\n      <td>19</td>\n      <td>0</td>\n      <td>12</td>\n      <td>75</td>\n      <td>...</td>\n      <td>0.006122</td>\n      <td>-0.023177</td>\n      <td>-0.019614</td>\n      <td>0.006968</td>\n      <td>0.015620</td>\n      <td>-0.022984</td>\n      <td>-0.059825</td>\n      <td>0.007075</td>\n      <td>-0.031380</td>\n      <td>-0.013899</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>8.183183e-01</td>\n      <td>7.647648e-01</td>\n      <td>0.374164</td>\n      <td>3.183734</td>\n      <td>2.148116</td>\n      <td>6</td>\n      <td>14</td>\n      <td>0</td>\n      <td>26</td>\n      <td>64</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>8.878879e-01</td>\n      <td>8.613614e-01</td>\n      <td>0.353391</td>\n      <td>4.587874</td>\n      <td>2.271073</td>\n      <td>6</td>\n      <td>11</td>\n      <td>0</td>\n      <td>26</td>\n      <td>57</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>7.507508e-01</td>\n      <td>5.150150e-01</td>\n      <td>0.769868</td>\n      <td>3.947191</td>\n      <td>2.052032</td>\n      <td>6</td>\n      <td>19</td>\n      <td>0</td>\n      <td>26</td>\n      <td>78</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>8.648649e-01</td>\n      <td>8.043043e-01</td>\n      <td>0.424667</td>\n      <td>3.941643</td>\n      <td>1.745707</td>\n      <td>6</td>\n      <td>10</td>\n      <td>0</td>\n      <td>26</td>\n      <td>44</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>9.514515e-01</td>\n      <td>9.503504e-01</td>\n      <td>0.311929</td>\n      <td>4.658513</td>\n      <td>1.788092</td>\n      <td>6</td>\n      <td>14</td>\n      <td>0</td>\n      <td>26</td>\n      <td>60</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>7.082082e-01</td>\n      <td>6.311311e-01</td>\n      <td>0.370347</td>\n      <td>3.669347</td>\n      <td>2.791334</td>\n      <td>6</td>\n      <td>22</td>\n      <td>0</td>\n      <td>26</td>\n      <td>94</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>8.278278e-01</td>\n      <td>7.962963e-01</td>\n      <td>0.335255</td>\n      <td>4.505778</td>\n      <td>2.890891</td>\n      <td>6</td>\n      <td>19</td>\n      <td>0</td>\n      <td>26</td>\n      <td>91</td>\n      <td>...</td>\n      <td>-0.019510</td>\n      <td>-0.037576</td>\n      <td>-0.015826</td>\n      <td>-0.008397</td>\n      <td>-0.027774</td>\n      <td>-0.001002</td>\n      <td>-0.007665</td>\n      <td>0.011621</td>\n      <td>-0.019590</td>\n      <td>-0.010583</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>19999970</th>\n      <td>9.359359e-01</td>\n      <td>8.958959e-01</td>\n      <td>0.458500</td>\n      <td>2.598250</td>\n      <td>2.980999</td>\n      <td>3</td>\n      <td>22</td>\n      <td>0</td>\n      <td>13</td>\n      <td>93</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999971</th>\n      <td>7.597598e-01</td>\n      <td>6.366366e-01</td>\n      <td>0.480988</td>\n      <td>2.529526</td>\n      <td>2.867128</td>\n      <td>3</td>\n      <td>17</td>\n      <td>0</td>\n      <td>13</td>\n      <td>69</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999972</th>\n      <td>2.162162e-01</td>\n      <td>1.826827e-01</td>\n      <td>0.192738</td>\n      <td>3.740707</td>\n      <td>2.782734</td>\n      <td>3</td>\n      <td>14</td>\n      <td>0</td>\n      <td>13</td>\n      <td>61</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999973</th>\n      <td>3.173173e-01</td>\n      <td>3.133133e-01</td>\n      <td>0.196087</td>\n      <td>3.287300</td>\n      <td>2.818681</td>\n      <td>3</td>\n      <td>14</td>\n      <td>0</td>\n      <td>13</td>\n      <td>59</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999974</th>\n      <td>2.162162e-01</td>\n      <td>3.523524e-01</td>\n      <td>0.087692</td>\n      <td>4.147095</td>\n      <td>2.398520</td>\n      <td>3</td>\n      <td>8</td>\n      <td>0</td>\n      <td>13</td>\n      <td>35</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999975</th>\n      <td>5.265265e-01</td>\n      <td>6.021021e-01</td>\n      <td>0.170917</td>\n      <td>2.822454</td>\n      <td>3.020941</td>\n      <td>3</td>\n      <td>17</td>\n      <td>0</td>\n      <td>13</td>\n      <td>71</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999976</th>\n      <td>5.265265e-01</td>\n      <td>4.029029e-01</td>\n      <td>0.386673</td>\n      <td>3.907083</td>\n      <td>2.705989</td>\n      <td>3</td>\n      <td>13</td>\n      <td>0</td>\n      <td>13</td>\n      <td>59</td>\n      <td>...</td>\n      <td>-0.008214</td>\n      <td>-0.009903</td>\n      <td>-0.005569</td>\n      <td>0.000742</td>\n      <td>-0.003285</td>\n      <td>-0.003038</td>\n      <td>0.000579</td>\n      <td>0.001678</td>\n      <td>-0.008010</td>\n      <td>-0.001814</td>\n    </tr>\n    <tr>\n      <th>19999977</th>\n      <td>6.946947e-01</td>\n      <td>7.597598e-01</td>\n      <td>0.186255</td>\n      <td>3.991168</td>\n      <td>2.428524</td>\n      <td>3</td>\n      <td>13</td>\n      <td>0</td>\n      <td>13</td>\n      <td>52</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999978</th>\n      <td>4.439439e-01</td>\n      <td>4.704705e-01</td>\n      <td>0.197964</td>\n      <td>3.470595</td>\n      <td>2.437507</td>\n      <td>3</td>\n      <td>10</td>\n      <td>0</td>\n      <td>13</td>\n      <td>41</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999979</th>\n      <td>6.091091e-01</td>\n      <td>5.895896e-01</td>\n      <td>0.269661</td>\n      <td>3.878566</td>\n      <td>2.666055</td>\n      <td>3</td>\n      <td>18</td>\n      <td>0</td>\n      <td>13</td>\n      <td>75</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999980</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.119522</td>\n      <td>3.057641</td>\n      <td>3</td>\n      <td>19</td>\n      <td>0</td>\n      <td>13</td>\n      <td>90</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999981</th>\n      <td>2.162162e-01</td>\n      <td>1.481481e-01</td>\n      <td>0.232531</td>\n      <td>4.370203</td>\n      <td>1.841433</td>\n      <td>3</td>\n      <td>6</td>\n      <td>0</td>\n      <td>13</td>\n      <td>25</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999982</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.178349</td>\n      <td>2.551256</td>\n      <td>3</td>\n      <td>15</td>\n      <td>0</td>\n      <td>13</td>\n      <td>65</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999983</th>\n      <td>2.162162e-01</td>\n      <td>1.000000e-07</td>\n      <td>0.610918</td>\n      <td>4.874597</td>\n      <td>1.964968</td>\n      <td>3</td>\n      <td>8</td>\n      <td>0</td>\n      <td>13</td>\n      <td>35</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999984</th>\n      <td>8.413413e-01</td>\n      <td>8.613614e-01</td>\n      <td>0.240591</td>\n      <td>2.993040</td>\n      <td>2.705538</td>\n      <td>3</td>\n      <td>15</td>\n      <td>0</td>\n      <td>13</td>\n      <td>59</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999985</th>\n      <td>2.162162e-01</td>\n      <td>4.459459e-01</td>\n      <td>0.059770</td>\n      <td>2.865118</td>\n      <td>2.889474</td>\n      <td>3</td>\n      <td>19</td>\n      <td>0</td>\n      <td>13</td>\n      <td>79</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999986</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.271039</td>\n      <td>2.645395</td>\n      <td>3</td>\n      <td>14</td>\n      <td>0</td>\n      <td>13</td>\n      <td>58</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999987</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>4.138363</td>\n      <td>2.495341</td>\n      <td>3</td>\n      <td>12</td>\n      <td>0</td>\n      <td>13</td>\n      <td>56</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999988</th>\n      <td>1.000000e-07</td>\n      <td>6.806807e-02</td>\n      <td>0.044410</td>\n      <td>2.749617</td>\n      <td>2.525988</td>\n      <td>3</td>\n      <td>11</td>\n      <td>1</td>\n      <td>13</td>\n      <td>52</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999989</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>3.561204</td>\n      <td>2.733928</td>\n      <td>3</td>\n      <td>13</td>\n      <td>1</td>\n      <td>13</td>\n      <td>58</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999990</th>\n      <td>1.000000e-07</td>\n      <td>6.806807e-02</td>\n      <td>0.044410</td>\n      <td>2.686338</td>\n      <td>2.708770</td>\n      <td>3</td>\n      <td>17</td>\n      <td>0</td>\n      <td>13</td>\n      <td>71</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999991</th>\n      <td>3.173173e-01</td>\n      <td>5.340340e-01</td>\n      <td>0.079210</td>\n      <td>4.557244</td>\n      <td>2.295096</td>\n      <td>3</td>\n      <td>12</td>\n      <td>1</td>\n      <td>13</td>\n      <td>53</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999992</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2.077618</td>\n      <td>3.122913</td>\n      <td>3</td>\n      <td>19</td>\n      <td>0</td>\n      <td>13</td>\n      <td>75</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999993</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2.974397</td>\n      <td>2.954713</td>\n      <td>3</td>\n      <td>15</td>\n      <td>0</td>\n      <td>13</td>\n      <td>67</td>\n      <td>...</td>\n      <td>-0.001539</td>\n      <td>-0.002263</td>\n      <td>-0.001413</td>\n      <td>0.003413</td>\n      <td>0.000706</td>\n      <td>-0.002660</td>\n      <td>-0.004498</td>\n      <td>-0.001064</td>\n      <td>0.006256</td>\n      <td>-0.005480</td>\n    </tr>\n    <tr>\n      <th>19999994</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2.394917</td>\n      <td>2.933309</td>\n      <td>3</td>\n      <td>11</td>\n      <td>0</td>\n      <td>16</td>\n      <td>51</td>\n      <td>...</td>\n      <td>-0.001456</td>\n      <td>-0.003631</td>\n      <td>-0.002238</td>\n      <td>-0.002284</td>\n      <td>-0.000340</td>\n      <td>-0.002185</td>\n      <td>-0.002078</td>\n      <td>-0.000240</td>\n      <td>-0.003171</td>\n      <td>0.000699</td>\n    </tr>\n    <tr>\n      <th>19999995</th>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1.896169</td>\n      <td>2.675375</td>\n      <td>3</td>\n      <td>14</td>\n      <td>0</td>\n      <td>16</td>\n      <td>67</td>\n      <td>...</td>\n      <td>-0.001456</td>\n      <td>-0.003631</td>\n      <td>-0.002238</td>\n      <td>-0.002284</td>\n      <td>-0.000340</td>\n      <td>-0.002185</td>\n      <td>-0.002078</td>\n      <td>-0.000240</td>\n      <td>-0.003171</td>\n      <td>0.000699</td>\n    </tr>\n    <tr>\n      <th>19999996</th>\n      <td>5.585586e-01</td>\n      <td>5.150150e-01</td>\n      <td>0.286846</td>\n      <td>1.978527</td>\n      <td>3.241991</td>\n      <td>3</td>\n      <td>18</td>\n      <td>0</td>\n      <td>16</td>\n      <td>80</td>\n      <td>...</td>\n      <td>-0.001456</td>\n      <td>-0.003631</td>\n      <td>-0.002238</td>\n      <td>-0.002284</td>\n      <td>-0.000340</td>\n      <td>-0.002185</td>\n      <td>-0.002078</td>\n      <td>-0.000240</td>\n      <td>-0.003171</td>\n      <td>0.000699</td>\n    </tr>\n    <tr>\n      <th>19999997</th>\n      <td>6.301301e-01</td>\n      <td>5.245245e-01</td>\n      <td>0.391830</td>\n      <td>2.371618</td>\n      <td>3.009820</td>\n      <td>4</td>\n      <td>15</td>\n      <td>0</td>\n      <td>19</td>\n      <td>63</td>\n      <td>...</td>\n      <td>0.002429</td>\n      <td>0.012458</td>\n      <td>0.005203</td>\n      <td>-0.013807</td>\n      <td>-0.014367</td>\n      <td>-0.001159</td>\n      <td>-0.008744</td>\n      <td>-0.000959</td>\n      <td>-0.004910</td>\n      <td>-0.007838</td>\n    </tr>\n    <tr>\n      <th>19999998</th>\n      <td>9.544545e-01</td>\n      <td>9.713543e-01</td>\n      <td>0.217284</td>\n      <td>3.215105</td>\n      <td>2.631603</td>\n      <td>4</td>\n      <td>14</td>\n      <td>0</td>\n      <td>19</td>\n      <td>58</td>\n      <td>...</td>\n      <td>0.002429</td>\n      <td>0.012458</td>\n      <td>0.005203</td>\n      <td>-0.013807</td>\n      <td>-0.014367</td>\n      <td>-0.001159</td>\n      <td>-0.008744</td>\n      <td>-0.000959</td>\n      <td>-0.004910</td>\n      <td>-0.007838</td>\n    </tr>\n    <tr>\n      <th>19999999</th>\n      <td>8.923924e-01</td>\n      <td>9.129129e-01</td>\n      <td>0.234969</td>\n      <td>2.728368</td>\n      <td>2.486051</td>\n      <td>4</td>\n      <td>13</td>\n      <td>0</td>\n      <td>19</td>\n      <td>57</td>\n      <td>...</td>\n      <td>0.002429</td>\n      <td>0.012458</td>\n      <td>0.005203</td>\n      <td>-0.013807</td>\n      <td>-0.014367</td>\n      <td>-0.001159</td>\n      <td>-0.008744</td>\n      <td>-0.000959</td>\n      <td>-0.004910</td>\n      <td>-0.007838</td>\n    </tr>\n  </tbody>\n</table>\n<p>20000000 rows × 56 columns</p>\n</div>"},"execution_count":17}],"source":"import pandas as pd\ntest_data=pd.read_pickle(\"/home/kesci/work/first_zzp/test_all_fea.pickle\")\nnorm(test_data,['title_count','title_sum','title_nunique_prefix'])\ntest_data","execution_count":17},{"metadata":{"id":"57AF1312CCD342898EF9A238258A8258","collapsed":true,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"stream","text":"1\n2.0564005653063457\n2\n2.1260224620501202\n3\n2.0961411158243815\n4\n2.452286008993785\n5\n2.2341359734535216\n6\n2.131627678871155\n7\n2.107110846042633\n8\n1.8496156533559163\n9\n1.8544540484746297\n10\n1.8585427522659301\n","name":"stdout"}],"source":"result_list=[]\nfrom time import time\nfor i in range(10):############因为我把最终一亿的测试集分块了，此处是分块读取test集，最后concat\n    print(i+1)\n    time1=time()\n    test_data=pd.read_pickle(\"/home/kesci/test_final_fea/part\"+str(i+1)+\".pickle\")\n    y_pred = gbm.predict(test_data[fea], num_iteration=700)\n    y_pred_df=pd.DataFrame(y_pred)\n    result_list.append(y_pred_df)\n    time2=time()\n    print((time2-time1)/60)","execution_count":5},{"metadata":{"id":"7B9DF4308CC1412B857D942DA528A4B5","collapsed":true,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"                0\n0        0.151121\n1        0.149615\n2        0.331942\n3        0.185651\n4        0.313227\n5        0.137957\n6        0.151461\n7        0.352418\n8        0.412920\n9        0.229125\n10       0.422423\n11       0.343675\n12       0.132137\n13       0.315063\n14       0.365872\n15       0.304227\n16       0.500563\n17       0.358627\n18       0.110453\n19       0.230809\n20       0.142595\n21       0.214993\n22       0.164248\n23       0.101982\n24       0.241527\n25       0.318662\n26       0.308996\n27       0.247026\n28       0.112649\n29       0.180954\n...           ...\n9999970  0.307133\n9999971  0.355174\n9999972  0.373904\n9999973  0.234655\n9999974  0.253388\n9999975  0.166741\n9999976  0.297376\n9999977  0.320385\n9999978  0.380277\n9999979  0.368060\n9999980  0.119370\n9999981  0.345374\n9999982  0.177132\n9999983  0.288262\n9999984  0.118421\n9999985  0.104061\n9999986  0.266571\n9999987  0.140014\n9999988  0.088411\n9999989  0.097909\n9999990  0.154874\n9999991  0.092670\n9999992  0.095372\n9999993  0.093918\n9999994  0.101368\n9999995  0.181028\n9999996  0.081480\n9999997  0.089809\n9999998  0.067667\n9999999  0.105153\n\n[100000000 rows x 1 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0.151121</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0.149615</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0.331942</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0.185651</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>0.313227</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>0.137957</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>0.151461</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>0.352418</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>0.412920</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>0.229125</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>0.422423</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>0.343675</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>0.132137</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>0.315063</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>0.365872</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>0.304227</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>0.500563</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>0.358627</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>0.110453</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>0.230809</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>0.142595</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>0.214993</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>0.164248</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>0.101982</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>0.241527</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>0.318662</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>0.308996</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>0.247026</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>0.112649</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>0.180954</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>9999970</th>\n      <td>0.307133</td>\n    </tr>\n    <tr>\n      <th>9999971</th>\n      <td>0.355174</td>\n    </tr>\n    <tr>\n      <th>9999972</th>\n      <td>0.373904</td>\n    </tr>\n    <tr>\n      <th>9999973</th>\n      <td>0.234655</td>\n    </tr>\n    <tr>\n      <th>9999974</th>\n      <td>0.253388</td>\n    </tr>\n    <tr>\n      <th>9999975</th>\n      <td>0.166741</td>\n    </tr>\n    <tr>\n      <th>9999976</th>\n      <td>0.297376</td>\n    </tr>\n    <tr>\n      <th>9999977</th>\n      <td>0.320385</td>\n    </tr>\n    <tr>\n      <th>9999978</th>\n      <td>0.380277</td>\n    </tr>\n    <tr>\n      <th>9999979</th>\n      <td>0.368060</td>\n    </tr>\n    <tr>\n      <th>9999980</th>\n      <td>0.119370</td>\n    </tr>\n    <tr>\n      <th>9999981</th>\n      <td>0.345374</td>\n    </tr>\n    <tr>\n      <th>9999982</th>\n      <td>0.177132</td>\n    </tr>\n    <tr>\n      <th>9999983</th>\n      <td>0.288262</td>\n    </tr>\n    <tr>\n      <th>9999984</th>\n      <td>0.118421</td>\n    </tr>\n    <tr>\n      <th>9999985</th>\n      <td>0.104061</td>\n    </tr>\n    <tr>\n      <th>9999986</th>\n      <td>0.266571</td>\n    </tr>\n    <tr>\n      <th>9999987</th>\n      <td>0.140014</td>\n    </tr>\n    <tr>\n      <th>9999988</th>\n      <td>0.088411</td>\n    </tr>\n    <tr>\n      <th>9999989</th>\n      <td>0.097909</td>\n    </tr>\n    <tr>\n      <th>9999990</th>\n      <td>0.154874</td>\n    </tr>\n    <tr>\n      <th>9999991</th>\n      <td>0.092670</td>\n    </tr>\n    <tr>\n      <th>9999992</th>\n      <td>0.095372</td>\n    </tr>\n    <tr>\n      <th>9999993</th>\n      <td>0.093918</td>\n    </tr>\n    <tr>\n      <th>9999994</th>\n      <td>0.101368</td>\n    </tr>\n    <tr>\n      <th>9999995</th>\n      <td>0.181028</td>\n    </tr>\n    <tr>\n      <th>9999996</th>\n      <td>0.081480</td>\n    </tr>\n    <tr>\n      <th>9999997</th>\n      <td>0.089809</td>\n    </tr>\n    <tr>\n      <th>9999998</th>\n      <td>0.067667</td>\n    </tr>\n    <tr>\n      <th>9999999</th>\n      <td>0.105153</td>\n    </tr>\n  </tbody>\n</table>\n<p>100000000 rows × 1 columns</p>\n</div>"},"execution_count":6}],"source":"result_all_test_final=pd.concat(result_list,axis=0)\nresult_all_test_final","execution_count":6},{"metadata":{"id":"0EFB35197CB64A18B84D03EF6D486B8F","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"0    0.196639\ndtype: float64"},"execution_count":7}],"source":"result_all_test_final.mean()","execution_count":7},{"metadata":{"id":"8773587242B64E9AB5BB2CE1CA892BA8","mdEditEnable":false},"cell_type":"markdown","source":"# 下面是不分快"},{"metadata":{"id":"38CE32DC31EE461486EC2046CB3B9A54","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[],"source":"# test_data=test_data[['title_ctr','title_sum','title_count','title_nunique_prefix']]\n# fea=list(test_data.columns)\ny_pred = gbm.predict(test_data[fea], num_iteration=gbm.best_iteration)\ny_pred_df=pd.DataFrame(y_pred)","execution_count":18},{"metadata":{"id":"8E8BE96BD64D4D558539E316CA330F41","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"0    0.19933\ndtype: float64"},"execution_count":19}],"source":"y_pred_df.mean()","execution_count":19},{"metadata":{"id":"2FD2754BC80C4C3A82DA8AD86B840E53","mdEditEnable":false},"cell_type":"markdown","source":"# 多进程读取测试集，取其中的query_id和title_id两列，用来做最终的submission"},{"metadata":{"id":"C575C362A235405783666129123C48FE","collapsed":false,"scrolled":true},"cell_type":"code","outputs":[{"output_type":"stream","text":"0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n24.824506759643555\n0.0001704692840576172\n","name":"stdout"}],"source":"# train_data_ctr=pd.read_csv(data_store_path+\"train_data_ctr.csv\")#读已经做好ctr特征的训练集\n# test_data_ctr=pd.read_csv(data_store_path+\"test_data_ctr.csv\")#读已经做好ctr特征的测试集\n# train_data=pd.read_csv(data_store_path+\"train_data.csv\")#读训练集（已经用shuffle分好了的）\n# train_data=pd.concat([train_data,train_data_ctr],axis=1)\n# test_data=pd.concat([test_data,test_data_ctr],axis=1)\n# del train_data_ctr\n# del test_data_ctr\n# train_data\n##########################################################################################\n# train_data=pd.read_csv(data_store_path+\"train_feature_first_no_query_and_title_data.csv\")#读已经做好ctr特征的训练集\n# test_data=pd.read_csv(data_store_path+\"test_feature_first_no_query_and_title_data.csv\")#读已经做好ctr特征的测试集\n###################################################下面这个是为了去做 相似度特征\n# train_data=pd.read_csv(data_store_path+\"train_feature_first_data.csv\")#读已经做好ctr特征的训练集\n# test_data=pd.read_csv(data_store_path+\"test_feature_first_data.csv\")#读已经做好ctr特征的测试集\n\n############################################\n# train_data=pd.read_csv(data_store_path+\"data_sets/train_data_ctr.csv\")#读已经做好ctr特征的训练集\n# test_data=pd.read_csv(data_store_path+\"data_sets/test_data_ctr.csv\")#读已经做好ctr特征的测试集\n\n# import h5py\n# f = h5py.File(data_store_path+\"train_data.hd5\",'r')   #打开h5文件\n# print(f.keys)\n# a = f['train']                    #取出主键为data的所有的键值\n# f.close()\n# print(a)\n\ndata_r_path='/home/kesci/input/bytedance/bytedance_contest.final_2.csv'\ncpu_num=16\nstart=0\nall_data_num=20000000\nnames_out=['query_id','query','query_title_id','title']\nusecols_out=['query_id','query_title_id']\nhas_head=False\nif has_head==False:\n    import multiprocessing\n    from time import time\n    import pandas as pd\n    all_data=[]\n    #test_final_part1.csv\n    def data_read(start,single_data_num,data_real_path):\n        data_out=pd.read_csv(data_real_path,header=None,names=names_out,usecols=usecols_out,skiprows=start,nrows=single_data_num)\n        return data_out\n    time1=time()\n    pool = multiprocessing.Pool(processes=cpu_num)\n    \n    for epoch in range(int(cpu_num)):\n        print(epoch)\n        single_data_num=int(all_data_num/cpu_num)\n        all_data.append(pool.apply_async(data_read, [start+single_data_num*epoch,single_data_num,data_r_path]))\n    # single_data_num=20000000/16\n    # all_data_num=20000000\n    # for epoch in range(int(all_data_num/single_data_num)):\n    #     all_data.append(pool.apply_async(data_read, [single_data_num*epoch,single_data_num,data_path+\"test_final_part1.csv\"]))\n    pool.close()\n    pool.join()\n    time2=time()\n    print(time2-time1)\n    all_data_pro=[single.get() for single in all_data]\n    time3=time()\n    print(time3-time2)\n    all_data_pro=pd.concat(all_data_pro)\n    test_data_pred_need=all_data_pro.reset_index(drop=True)\n\n\n# result=pd.concat([test_data_pred_need,y_pred_df],axis=1)\n# result.to_csv(\"first_zzp/result/sub_lgb_1200epoch.csv\",header=None,index=None)#####合成三列做 最终的提交结果\n","execution_count":22},{"metadata":{"id":"65EA3CA5888741BEA42DD3DA0E5E9AC6","collapsed":false,"scrolled":false},"cell_type":"code","outputs":[{"output_type":"stream","text":"wget: /opt/conda/lib/libcrypto.so.1.0.0: no version information available (required by wget)\nwget: /opt/conda/lib/libssl.so.1.0.0: no version information available (required by wget)\nwget: /opt/conda/lib/libssl.so.1.0.0: no version information available (required by wget)\n--2019-08-10 16:58:05--  https://www.heywhale.com/kesci_submit\nResolving www.heywhale.com (www.heywhale.com)... 106.15.25.147\nConnecting to www.heywhale.com (www.heywhale.com)|106.15.25.147|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 6709558 (6.4M) [application/octet-stream]\nSaving to: ‘kesci_submit’\n\nkesci_submit        100%[===================>]   6.40M  21.1MB/s    in 0.3s    \n\n2019-08-10 16:58:05 (21.1 MB/s) - ‘kesci_submit’ saved [6709558/6709558]\n\nKesci Submit Tool 3.2.1\n\n> 已验证Token\n> 提交文件 /home/kesci/work/first_zzp/result/sub_lgb_1200epoch.csv (575321.68 KiB)\n> 已上传 100 %\n> 文件已上传        \n> 服务器响应: 200 提交成功，请等待评审完成\n> 提交完成\n","name":"stdout"}],"source":"!wget -O kesci_submit https://www.heywhale.com/kesci_submit&&chmod +x kesci_submit\n!https_proxy=\"http://klab-external-proxy\" ./kesci_submit -file /home/kesci/work/first_zzp/result/sub_lgb_1200epoch.csv -token 02ada54c9760d3e1","execution_count":24},{"metadata":{"id":"8605AAABFB2A4E439D8E612AE8D480A9"},"cell_type":"code","outputs":[],"source":"","execution_count":null}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":0}